################################################################################# # # MEAN CUMULATIVE COUNT # # Last updated July, 2025. ################################################################################# ###### NOTE: The code replies on the function "crprep" which came from the "mstate" package but it worked perfect on R 3.5.X or eralier versions. With later R version, it runs without any error but the results are not as expected. So please use R 3.5.X version from https://cran.r-project.org/bin/windows/base/old/ ################################################################################# ################################################################################# # R code to accompany: # # Dong H, Robison LL, Leisenring WM, Martin LJ, Armstrong GT, Yasui Y. (in press). # Estimating the burden of recurrent events in the presence of competing risks: # The method of mean cumulative count. American Journal of Epidemiology. # # NOTE:--------------------------------------------------------------------------# # This code is to calculate MCC through the Sum of Cumualtive Incidence (SCI), with the option for 95% confidence interval. # -------------------------------------------------------------------------------# # # R version 3.2.4 # Contact: Qi Liu, ql3@ualberta.ca ############ ------------- PARAMETERS -------------- ######### ############----- id: Participants's ID ############----- time: Follow up time of participant to interest/competing-risk event/censoring ############----- cause: Event of interest is coded as 1; competing-risk event as 2; censoring as 0 ############----- Tstart: Vector with start time of follow-up. Default is 0 (no left truncation). ############----- type: Character string specifying the type of calcultion. # Possible values are "MCC" (MCC calculation by equation) and "SCI" (sum of cumulative incidence). ################################################################################# library("mstate") ###cumulative incidence that can be used for both left truncation and/or right censoring library(cmprsk) ###cumulative incidence for right censoring only ### These two packages are needed to fill in NA with previous non-missing value ### library(dplyr) #library(DataCombine) #FillDown: Fills in missing (NA) values with the previous non-missing value from "DataCombine" package. not in R anu more. library(zoo) ### Revised version of function "crprep" from "mstate" package--Found a bug and contact the author of "mstate". The author sent me the revised version of this function and the package will be updated by the author later. source("R:\\Biostatistics\\Biostatistics\\Common\\Qi\\webpage\\MCC\\crprep_Revised.R") ################### This is the main function to run ################### MCC=function(id,time,cause,Tstart=0,type="SCI",c_interval=FALSE) { if(type=="MCC"){ outdata=MCC_eq(id,time,cause) } if(type=="SCI" & !c_interval){ outdata=SCI(id,time,cause,Tstart) } if(type=="SCI" & c_interval){ outdata=SCI_95CI(id,time,cause,Tstart) } return(outdata) } ################### Calculate the Sum of Cumulative Incidence ########## SCI=function(id,time,cause,Tstart) { ### if the number of event 1 is 0, then no need to run, but give MCC =0; if(sum(cause==1)==0){ MCC.final=matrix(c(20,0),nrow=1) colnames(MCC.final)=c("time","SumCIs") } if(sum(cause==1)>0){ if(max(Tstart)==0) Tstart=rep(0,length(time)) ### By default it is a value 0, need to make it a vector of the same lengnth ### if event time is at cohrot entry, then add a small number to make them different, since in coutning process (to,t1] where t1>t0. time[time==Tstart]=Tstart[time==Tstart]+exp(-13) #### one person ends with either 2 (competing risk) or 0 (censoring), cannot have both 2 and 0. ### if one person had an event and then censoring or competing risk on the same date, put event first. indata=data.frame(id,time,cause,Tstart) indata$cause1=indata$cause indata$cause1[indata$cause==2]=-1 ii=order(indata$id,indata$time,-indata$cause1) indata=indata[ii,] ## Check whether Tstart is needed. This indicator will be used in SCI fucntion. calc.trunc <- any(Tstart[!is.na(Tstart)] != 0) indata$first <- ave(indata$time, indata$id, FUN = seq_along) M=max(indata$first) #maximum numer of events ### Take the last row so we know the maximum number of events per person event.number=do.call(rbind, lapply(split(indata, indata$id), tail, 1))[,c("id","first")] colnames(event.number)=c("id","maxE") alldata=merge(indata,event.number,by.x="id",by.y="id") #### make data for cumulative incidence, with dimension M*N data.MCC=NULL for(i in 1:M){ if(i==1){ data_temp=alldata[alldata$first==1,] data_temp$m_event=i data.MCC=rbind(data.MCC,data_temp) } if(i>1){ ## for those with i or more records, take the ith record the_ith=alldata[alldata$first==i,] ##for those with 14,000 individuals and hence bootstrap for 95% CI takes long to run #### smn=read.csv("R:\\Biostatistics\\Biostatistics\\Common\\Qi\\webpage\\MCC\\smndata.csv") smn[1:2,] table(smn$SMNcat) res_smn=MCC(id=smn$ccssid, time=smn$timesmn,cause=smn$SMNcat,type="SCI") ## no 95% CI res_smn95CI=MCC(id=smn$ccssid, time=smn$timesmn,cause=smn$SMNcat,type="SCI",c_interval=TRUE) ##with 95% CI.1.5hrs res_smn95CI=res_smn95CI$MCC_CI plot(c(0,35),c(0,0.5),type="n",xlab="Years from DX",ylab="MCC") lines(res_smn95CI$time,res_smn95CI$SumCIs,type="s") lines(res_smn95CI$time,res_smn95CI$low,type="s",col=4) lines(res_smn95CI$time,res_smn95CI$up,type="s",col=4) ########## The "crprep" gave different results when R versions are different. Currently my tests showed that it ran as expected in R 3.5.X and 4.2.2 (but not 4.4.0). Users need to test the run whenever a new R version is used. Use this dataset to test: A correct version would give burden=1.3 at the last time point. test_data=read.csv("R:\\Biostatistics\\Biostatistics\\Common\\Qi\\webpage\\MCC\\test_data.csv") res_test=MCC(id=test_data$id, time=test_data$timebirth,cause=test_data$status15,Tstart=test_data$astart,type="SCI")